<!DOCTYPE html>
<html lang="zh-cn">
<head>
    
    <link type="text/css" rel="stylesheet" href="/bundles/blog-common.css?v=KOZafwuaDasEedEenI5aTy8aXH0epbm6VUJ0v3vsT_Q1"/>
<link id="MainCss" type="text/css" rel="stylesheet" href="/skins/ThinkInside/bundle-ThinkInside.css?v=RRjf6pEarGnbXZ86qxNycPfQivwSKWRa4heYLB15rVE1"/>
<link type="text/css" rel="stylesheet" href="/blog/customcss/428549.css?v=%2fam3bBTkW5NBWhBE%2fD0lcyJv5UM%3d"/>

</head>
<body>
<a name="top"></a>

<div id="page_begin_html"></div><script>load_page_begin_html();</script>

<div id="topics">
	<div class = "post">
		<h1 class = "postTitle">
			<a id="cb_post_title_url" class="postTitle2" href="https://www.cnblogs.com/frankdeng/p/9255766.html">Hadoop案例（一）之日志清洗</a>
		</h1>
		<div class="clear"></div>
		<div class="postBody">
			<div id="cnblogs_post_body" class="blogpost-body"><h2><strong>日志清洗案例</strong></h2>
<h2><strong>一.&nbsp;</strong><strong>简单解析版</strong></h2>
<h3>1<span style="font-family: 宋体;">）需求</span></h3>
<p>去除日志中字段<span style="font-family: 宋体;">长度小于</span><span style="font-family: 宋体;">等于</span>11<span style="font-family: 宋体;">的日志。</span></p>
<h3>2<span style="font-family: 宋体;">）</span><span style="font-family: 宋体;">输入</span>数据</h3>
<div class="cnblogs_code">
<pre><span style="color: #800080;">194.237</span>.<span style="color: #800080;">142.21</span> - - [<span style="color: #800080;">18</span>/Sep/<span style="color: #800080;">2013</span>:<span style="color: #800080;">06</span>:<span style="color: #800080;">49</span>:<span style="color: #800080;">18</span> +<span style="color: #800080;">0000</span>] <span style="color: #800000;">"</span><span style="color: #800000;">GET /wp-content/uploads/2013/07/rstudio-git3.png HTTP/1.1</span><span style="color: #800000;">"</span> <span style="color: #800080;">304</span> <span style="color: #800080;">0</span> <span style="color: #800000;">"</span><span style="color: #800000;">-</span><span style="color: #800000;">"</span> <span style="color: #800000;">"</span><span style="color: #800000;">Mozilla/4.0 (compatible;)</span><span style="color: #800000;">"</span>
<span style="color: #800080;">183.49</span>.<span style="color: #800080;">46.228</span> - - [<span style="color: #800080;">18</span>/Sep/<span style="color: #800080;">2013</span>:<span style="color: #800080;">06</span>:<span style="color: #800080;">49</span>:<span style="color: #800080;">23</span> +<span style="color: #800080;">0000</span>] <span style="color: #800000;">"</span><span style="color: #800000;">-</span><span style="color: #800000;">"</span> <span style="color: #800080;">400</span> <span style="color: #800080;">0</span> <span style="color: #800000;">"</span><span style="color: #800000;">-</span><span style="color: #800000;">"</span> <span style="color: #800000;">"</span><span style="color: #800000;">-</span><span style="color: #800000;">"</span>
<span style="color: #800080;">163.177</span>.<span style="color: #800080;">71.12</span> - - [<span style="color: #800080;">18</span>/Sep/<span style="color: #800080;">2013</span>:<span style="color: #800080;">06</span>:<span style="color: #800080;">49</span>:<span style="color: #800080;">33</span> +<span style="color: #800080;">0000</span>] <span style="color: #800000;">"</span><span style="color: #800000;">HEAD / HTTP/1.1</span><span style="color: #800000;">"</span> <span style="color: #800080;">200</span> <span style="color: #800080;">20</span> <span style="color: #800000;">"</span><span style="color: #800000;">-</span><span style="color: #800000;">"</span> <span style="color: #800000;">"</span><span style="color: #800000;">DNSPod-Monitor/1.0</span><span style="color: #800000;">"</span>
<span style="color: #800080;">163.177</span>.<span style="color: #800080;">71.12</span> - - [<span style="color: #800080;">18</span>/Sep/<span style="color: #800080;">2013</span>:<span style="color: #800080;">06</span>:<span style="color: #800080;">49</span>:<span style="color: #800080;">36</span> +<span style="color: #800080;">0000</span>] <span style="color: #800000;">"</span><span style="color: #800000;">HEAD / HTTP/1.1</span><span style="color: #800000;">"</span> <span style="color: #800080;">200</span> <span style="color: #800080;">20</span> <span style="color: #800000;">"</span><span style="color: #800000;">-</span><span style="color: #800000;">"</span> <span style="color: #800000;">"</span><span style="color: #800000;">DNSPod-Monitor/1.0</span><span style="color: #800000;">"</span>
<span style="color: #800080;">101.226</span>.<span style="color: #800080;">68.137</span> - - [<span style="color: #800080;">18</span>/Sep/<span style="color: #800080;">2013</span>:<span style="color: #800080;">06</span>:<span style="color: #800080;">49</span>:<span style="color: #800080;">42</span> +<span style="color: #800080;">0000</span>] <span style="color: #800000;">"</span><span style="color: #800000;">HEAD / HTTP/1.1</span><span style="color: #800000;">"</span> <span style="color: #800080;">200</span> <span style="color: #800080;">20</span> <span style="color: #800000;">"</span><span style="color: #800000;">-</span><span style="color: #800000;">"</span> <span style="color: #800000;">"</span><span style="color: #800000;">DNSPod-Monitor/1.0</span><span style="color: #800000;">"</span>
<span style="color: #800080;">101.226</span>.<span style="color: #800080;">68.137</span> - - [<span style="color: #800080;">18</span>/Sep/<span style="color: #800080;">2013</span>:<span style="color: #800080;">06</span>:<span style="color: #800080;">49</span>:<span style="color: #800080;">45</span> +<span style="color: #800080;">0000</span>] <span style="color: #800000;">"</span><span style="color: #800000;">HEAD / HTTP/1.1</span><span style="color: #800000;">"</span> <span style="color: #800080;">200</span> <span style="color: #800080;">20</span> <span style="color: #800000;">"</span><span style="color: #800000;">-</span><span style="color: #800000;">"</span> <span style="color: #800000;">"</span><span style="color: #800000;">DNSPod-Monitor/1.0</span><span style="color: #800000;">"</span>
<span style="color: #800080;">60.208</span>.<span style="color: #800080;">6.156</span> - - [<span style="color: #800080;">18</span>/Sep/<span style="color: #800080;">2013</span>:<span style="color: #800080;">06</span>:<span style="color: #800080;">49</span>:<span style="color: #800080;">48</span> +<span style="color: #800080;">0000</span>] <span style="color: #800000;">"</span><span style="color: #800000;">GET /wp-content/uploads/2013/07/rcassandra.png HTTP/1.0</span><span style="color: #800000;">"</span> <span style="color: #800080;">200</span> <span style="color: #800080;">185524</span> <span style="color: #800000;">"</span><span style="color: #800000;">http://cos.name/category/software/packages/</span><span style="color: #800000;">"</span> <span style="color: #800000;">"</span><span style="color: #800000;">Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.66 Safari/537.36</span><span style="color: #800000;">"</span>
<span style="color: #800080;">222.68</span>.<span style="color: #800080;">172.190</span> - - [<span style="color: #800080;">18</span>/Sep/<span style="color: #800080;">2013</span>:<span style="color: #800080;">06</span>:<span style="color: #800080;">49</span>:<span style="color: #800080;">57</span> +<span style="color: #800080;">0000</span>] <span style="color: #800000;">"</span><span style="color: #800000;">GET /images/my.jpg HTTP/1.1</span><span style="color: #800000;">"</span> <span style="color: #800080;">200</span> <span style="color: #800080;">19939</span> <span style="color: #800000;">"</span><span style="color: #800000;">http://www.angularjs.cn/A00n</span><span style="color: #800000;">"</span> <span style="color: #800000;">"</span><span style="color: #800000;">Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.66 Safari/537.36</span><span style="color: #800000;">"</span>
<span style="color: #800080;">222.68</span>.<span style="color: #800080;">172.190</span> - - [<span style="color: #800080;">18</span>/Sep/<span style="color: #800080;">2013</span>:<span style="color: #800080;">06</span>:<span style="color: #800080;">50</span>:<span style="color: #800080;">08</span> +<span style="color: #800080;">0000</span>] <span style="color: #800000;">"</span><span style="color: #800000;">-</span><span style="color: #800000;">"</span> <span style="color: #800080;">400</span> <span style="color: #800080;">0</span> <span style="color: #800000;">"</span><span style="color: #800000;">-</span><span style="color: #800000;">"</span> <span style="color: #800000;">"</span><span style="color: #800000;">-</span><span style="color: #800000;">"</span>
<span style="color: #800080;">183.195</span>.<span style="color: #800080;">232.138</span> - - [<span style="color: #800080;">18</span>/Sep/<span style="color: #800080;">2013</span>:<span style="color: #800080;">06</span>:<span style="color: #800080;">50</span>:<span style="color: #800080;">16</span> +<span style="color: #800080;">0000</span>] <span style="color: #800000;">"</span><span style="color: #800000;">HEAD / HTTP/1.1</span><span style="color: #800000;">"</span> <span style="color: #800080;">200</span> <span style="color: #800080;">20</span> <span style="color: #800000;">"</span><span style="color: #800000;">-</span><span style="color: #800000;">"</span> <span style="color: #800000;">"</span><span style="color: #800000;">DNSPod-Monitor/1.0</span><span style="color: #800000;">"</span>
<span style="color: #800080;">183.195</span>.<span style="color: #800080;">232.138</span> - - [<span style="color: #800080;">18</span>/Sep/<span style="color: #800080;">2013</span>:<span style="color: #800080;">06</span>:<span style="color: #800080;">50</span>:<span style="color: #800080;">16</span> +<span style="color: #800080;">0000</span>] <span style="color: #800000;">"</span><span style="color: #800000;">HEAD / HTTP/1.1</span><span style="color: #800000;">"</span> <span style="color: #800080;">200</span> <span style="color: #800080;">20</span> <span style="color: #800000;">"</span><span style="color: #800000;">-</span><span style="color: #800000;">"</span> <span style="color: #800000;">"</span><span style="color: #800000;">DNSPod-Monitor/1.0</span><span style="color: #800000;">"</span>
<span style="color: #800080;">66.249</span>.<span style="color: #800080;">66.84</span> - - [<span style="color: #800080;">18</span>/Sep/<span style="color: #800080;">2013</span>:<span style="color: #800080;">06</span>:<span style="color: #800080;">50</span>:<span style="color: #800080;">28</span> +<span style="color: #800080;">0000</span>] <span style="color: #800000;">"</span><span style="color: #800000;">GET /page/6/ HTTP/1.1</span><span style="color: #800000;">"</span> <span style="color: #800080;">200</span> <span style="color: #800080;">27777</span> <span style="color: #800000;">"</span><span style="color: #800000;">-</span><span style="color: #800000;">"</span> <span style="color: #800000;">"</span><span style="color: #800000;">Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)</span><span style="color: #800000;">"</span>
<span style="color: #800080;">221.130</span>.<span style="color: #800080;">41.168</span> - - [<span style="color: #800080;">18</span>/Sep/<span style="color: #800080;">2013</span>:<span style="color: #800080;">06</span>:<span style="color: #800080;">50</span>:<span style="color: #800080;">37</span> +<span style="color: #800080;">0000</span>] <span style="color: #800000;">"</span><span style="color: #800000;">GET /feed/ HTTP/1.1</span><span style="color: #800000;">"</span> <span style="color: #800080;">304</span> <span style="color: #800080;">0</span> <span style="color: #800000;">"</span><span style="color: #800000;">-</span><span style="color: #800000;">"</span> <span style="color: #800000;">"</span><span style="color: #800000;">Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.66 Safari/537.36</span><span style="color: #800000;">"</span>
<span style="color: #800080;">157.55</span>.<span style="color: #800080;">35.40</span> - - [<span style="color: #800080;">18</span>/Sep/<span style="color: #800080;">2013</span>:<span style="color: #800080;">06</span>:<span style="color: #800080;">51</span>:<span style="color: #800080;">13</span> +<span style="color: #800080;">0000</span>] <span style="color: #800000;">"</span><span style="color: #800000;">GET /robots.txt HTTP/1.1</span><span style="color: #800000;">"</span> <span style="color: #800080;">200</span> <span style="color: #800080;">150</span> <span style="color: #800000;">"</span><span style="color: #800000;">-</span><span style="color: #800000;">"</span> <span style="color: #800000;">"</span><span style="color: #800000;">Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)</span><span style="color: #800000;">"</span>
<span style="color: #800080;">50.116</span>.<span style="color: #800080;">27.194</span> - - [<span style="color: #800080;">18</span>/Sep/<span style="color: #800080;">2013</span>:<span style="color: #800080;">06</span>:<span style="color: #800080;">51</span>:<span style="color: #800080;">35</span> +<span style="color: #800080;">0000</span>] <span style="color: #800000;">"</span><span style="color: #800000;">POST /wp-cron.php?doing_wp_cron=1379487095.2510800361633300781250 HTTP/1.0</span><span style="color: #800000;">"</span> <span style="color: #800080;">200</span> <span style="color: #800080;">0</span> <span style="color: #800000;">"</span><span style="color: #800000;">-</span><span style="color: #800000;">"</span> <span style="color: #800000;">"</span><span style="color: #800000;">WordPress/3.6; http://blog.fens.me</span><span style="color: #800000;">"</span>
<span style="color: #800080;">58.215</span>.<span style="color: #800080;">204.118</span> - - [<span style="color: #800080;">18</span>/Sep/<span style="color: #800080;">2013</span>:<span style="color: #800080;">06</span>:<span style="color: #800080;">51</span>:<span style="color: #800080;">35</span> +<span style="color: #800080;">0000</span>] <span style="color: #800000;">"</span><span style="color: #800000;">GET /nodejs-socketio-chat/ HTTP/1.1</span><span style="color: #800000;">"</span> <span style="color: #800080;">200</span> <span style="color: #800080;">10818</span> <span style="color: #800000;">"</span><span style="color: #800000;">http://www.google.com/url?sa=t&amp;rct=j&amp;q=nodejs%20%E5%BC%82%E6%AD%A5%E5%B9%BF%E6%92%AD&amp;source=web&amp;cd=1&amp;cad=rja&amp;ved=0CCgQFjAA&amp;url=%68%74%74%70%3a%2f%2f%62%6c%6f%67%2e%66%65%6e%73%2e%6d%65%2f%6e%6f%64%65%6a%73%2d%73%6f%63%6b%65%74%69%6f%2d%63%68%61%74%2f&amp;ei=rko5UrylAefOiAe7_IGQBw&amp;usg=AFQjCNG6YWoZsJ_bSj8kTnMHcH51hYQkAA&amp;bvm=bv.52288139,d.aGc</span><span style="color: #800000;">"</span> <span style="color: #800000;">"</span><span style="color: #800000;">Mozilla/5.0 (Windows NT 5.1; rv:23.0) Gecko/20100101 Firefox/23.0</span><span style="color: #800000;">"</span>
<span style="color: #800080;">58.215</span>.<span style="color: #800080;">204.118</span> - - [<span style="color: #800080;">18</span>/Sep/<span style="color: #800080;">2013</span>:<span style="color: #800080;">06</span>:<span style="color: #800080;">51</span>:<span style="color: #800080;">36</span> +<span style="color: #800080;">0000</span>] <span style="color: #800000;">"</span><span style="color: #800000;">GET /wp-includes/js/jquery/jquery-migrate.min.js?ver=1.2.1 HTTP/1.1</span><span style="color: #800000;">"</span> <span style="color: #800080;">304</span> <span style="color: #800080;">0</span> <span style="color: #800000;">"</span><span style="color: #800000;">http://blog.fens.me/nodejs-socketio-chat/</span><span style="color: #800000;">"</span> <span style="color: #800000;">"</span><span style="color: #800000;">Mozilla/5.0 (Windows NT 5.1; rv:23.0) Gecko/20100101 Firefox/23.0</span><span style="color: #800000;">"</span>
<span style="color: #800080;">58.215</span>.<span style="color: #800080;">204.118</span> - - [<span style="color: #800080;">18</span>/Sep/<span style="color: #800080;">2013</span>:<span style="color: #800080;">06</span>:<span style="color: #800080;">51</span>:<span style="color: #800080;">35</span> +<span style="color: #800080;">0000</span>] <span style="color: #800000;">"</span><span style="color: #800000;">GET /wp-includes/js/jquery/jquery.js?ver=1.10.2 HTTP/1.1</span><span style="color: #800000;">"</span> <span style="color: #800080;">304</span> <span style="color: #800080;">0</span> <span style="color: #800000;">"</span><span style="color: #800000;">http://blog.fens.me/nodejs-socketio-chat/</span><span style="color: #800000;">"</span> <span style="color: #800000;">"</span><span style="color: #800000;">Mozilla/5.0 (Windows NT 5.1; rv:23.0) Gecko/20100101 Firefox/23.0</span><span style="color: #800000;">"</span>
<span style="color: #800080;">58.215</span>.<span style="color: #800080;">204.118</span> - - [<span style="color: #800080;">18</span>/Sep/<span style="color: #800080;">2013</span>:<span style="color: #800080;">06</span>:<span style="color: #800080;">51</span>:<span style="color: #800080;">36</span> +<span style="color: #800080;">0000</span>] <span style="color: #800000;">"</span><span style="color: #800000;">GET /wp-includes/js/comment-reply.min.js?ver=3.6 HTTP/1.1</span><span style="color: #800000;">"</span> <span style="color: #800080;">304</span> <span style="color: #800080;">0</span> <span style="color: #800000;">"</span><span style="color: #800000;">http://blog.fens.me/nodejs-socketio-chat/</span><span style="color: #800000;">"</span> <span style="color: #800000;">"</span><span style="color: #800000;">Mozilla/5.0 (Windows NT 5.1; rv:23.0) Gecko/20100101 Firefox/23.0</span><span style="color: #800000;">"</span>
<span style="color: #800080;">58.215</span>.<span style="color: #800080;">204.118</span> - - [<span style="color: #800080;">18</span>/Sep/<span style="color: #800080;">2013</span>:<span style="color: #800080;">06</span>:<span style="color: #800080;">51</span>:<span style="color: #800080;">36</span> +<span style="color: #800080;">0000</span>] <span style="color: #800000;">"</span><span style="color: #800000;">GET /wp-content/uploads/2013/08/chat.png HTTP/1.1</span><span style="color: #800000;">"</span> <span style="color: #800080;">200</span> <span style="color: #800080;">48968</span> <span style="color: #800000;">"</span><span style="color: #800000;">http://blog.fens.me/nodejs-socketio-chat/</span><span style="color: #800000;">"</span> <span style="color: #800000;">"</span><span style="color: #800000;">Mozilla/5.0 (Windows NT 5.1; rv:23.0) Gecko/20100101 Firefox/23.0</span><span style="color: #800000;">"</span>
<span style="color: #800080;">58.215</span>.<span style="color: #800080;">204.118</span> - - [<span style="color: #800080;">18</span>/Sep/<span style="color: #800080;">2013</span>:<span style="color: #800080;">06</span>:<span style="color: #800080;">51</span>:<span style="color: #800080;">36</span> +<span style="color: #800080;">0000</span>] <span style="color: #800000;">"</span><span style="color: #800000;">GET /wp-content/uploads/2013/08/chat2.png HTTP/1.1</span><span style="color: #800000;">"</span> <span style="color: #800080;">200</span> <span style="color: #800080;">59852</span> <span style="color: #800000;">"</span><span style="color: #800000;">http://blog.fens.me/nodejs-socketio-chat/</span><span style="color: #800000;">"</span> <span style="color: #800000;">"</span><span style="color: #800000;">Mozilla/5.0 (Windows NT 5.1; rv:23.0) Gecko/20100101 Firefox/23.0</span><span style="color: #800000;">"</span>
<span style="color: #800080;">58.215</span>.<span style="color: #800080;">204.118</span> - - [<span style="color: #800080;">18</span>/Sep/<span style="color: #800080;">2013</span>:<span style="color: #800080;">06</span>:<span style="color: #800080;">51</span>:<span style="color: #800080;">37</span> +<span style="color: #800080;">0000</span>] <span style="color: #800000;">"</span><span style="color: #800000;">GET /wp-content/uploads/2013/08/socketio.png HTTP/1.1</span><span style="color: #800000;">"</span> <span style="color: #800080;">200</span> <span style="color: #800080;">80493</span> <span style="color: #800000;">"</span><span style="color: #800000;">http://blog.fens.me/nodejs-socketio-chat/</span><span style="color: #800000;">"</span> <span style="color: #800000;">"</span><span style="color: #800000;">Mozilla/5.0 (Windows NT 5.1; rv:23.0) Gecko/20100101 Firefox/23.0</span><span style="color: #800000;">"</span>
<span style="color: #800080;">58.248</span>.<span style="color: #800080;">178.212</span> - - [<span style="color: #800080;">18</span>/Sep/<span style="color: #800080;">2013</span>:<span style="color: #800080;">06</span>:<span style="color: #800080;">51</span>:<span style="color: #800080;">37</span> +<span style="color: #800080;">0000</span>] <span style="color: #800000;">"</span><span style="color: #800000;">GET /nodejs-grunt-intro/ HTTP/1.1</span><span style="color: #800000;">"</span> <span style="color: #800080;">200</span> <span style="color: #800080;">51770</span> <span style="color: #800000;">"</span><span style="color: #800000;">http://blog.fens.me/series-nodejs/</span><span style="color: #800000;">"</span> <span style="color: #800000;">"</span><span style="color: #800000;">Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; .NET CLR 1.1.4322; .NET CLR 2.0.50727; .NET CLR 3.0.04506.30; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729; MDDR; InfoPath.2; .NET4.0C)</span><span style="color: #800000;">"</span>
<span style="color: #800080;">58.248</span>.<span style="color: #800080;">178.212</span> - - [<span style="color: #800080;">18</span>/Sep/<span style="color: #800080;">2013</span>:<span style="color: #800080;">06</span>:<span style="color: #800080;">51</span>:<span style="color: #800080;">40</span> +<span style="color: #800080;">0000</span>] <span style="color: #800000;">"</span><span style="color: #800000;">GET /wp-includes/js/jquery/jquery-migrate.min.js?ver=1.2.1 HTTP/1.1</span><span style="color: #800000;">"</span> <span style="color: #800080;">200</span> <span style="color: #800080;">7200</span> <span style="color: #800000;">"</span><span style="color: #800000;">http://blog.fens.me/nodejs-grunt-intro/</span><span style="color: #800000;">"</span> <span style="color: #800000;">"</span><span style="color: #800000;">Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; .NET CLR 1.1.4322; .NET CLR 2.0.50727; .NET CLR 3.0.04506.30; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729; MDDR; InfoPath.2; .NET4.0C)</span><span style="color: #800000;">"</span></pre>
</div>
<h3>3）实现代码</h3>
<p>（1）编写LogMapper</p>
<div class="cnblogs_code">
<pre><span style="color: #000000;">package com.xyg.mapreduce.weblog;
import java.io.IOException;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

</span><span style="color: #0000ff;">public</span> <span style="color: #0000ff;">class</span> LogMapper extends Mapper&lt;LongWritable, Text, Text, NullWritable&gt;<span style="color: #000000;">{
    
    Text k </span>= <span style="color: #0000ff;">new</span><span style="color: #000000;"> Text();
    
    @Override
    </span><span style="color: #0000ff;">protected</span> <span style="color: #0000ff;">void</span><span style="color: #000000;"> map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        
        </span><span style="color: #008000;">//</span><span style="color: #008000;"> 1 获取1行数据</span>
        String line =<span style="color: #000000;"> value.toString();        
        </span><span style="color: #008000;">//</span><span style="color: #008000;"> 2 解析日志</span>
        boolean result =<span style="color: #000000;"> parseLog(line,context);       
        </span><span style="color: #008000;">//</span><span style="color: #008000;"> 3 日志不合法退出</span>
        <span style="color: #0000ff;">if</span> (!<span style="color: #000000;">result) {
            </span><span style="color: #0000ff;">return</span><span style="color: #000000;">;
        }     
        </span><span style="color: #008000;">//</span><span style="color: #008000;"> 4 设置key</span>
        k.<span style="color: #0000ff;">set</span><span style="color: #000000;">(line);   
        </span><span style="color: #008000;">//</span><span style="color: #008000;"> 5 写出数据</span>
        context.write(k, NullWritable.<span style="color: #0000ff;">get</span><span style="color: #000000;">());
    }
    </span><span style="color: #008000;">//</span><span style="color: #008000;"> 2 解析日志</span>
    <span style="color: #0000ff;">private</span><span style="color: #000000;"> boolean parseLog(String line, Context context) {
        </span><span style="color: #008000;">//</span><span style="color: #008000;"> 1 截取</span>
        String[] fields = line.split(<span style="color: #800000;">"</span> <span style="color: #800000;">"</span><span style="color: #000000;">);
        
        </span><span style="color: #008000;">//</span><span style="color: #008000;"> 2 日志长度大于11的为合法</span>
        <span style="color: #0000ff;">if</span> (fields.length &gt; <span style="color: #800080;">11</span><span style="color: #000000;">) {
            </span><span style="color: #008000;">//</span><span style="color: #008000;"> 系统计数器</span>
            context.getCounter(<span style="color: #800000;">"</span><span style="color: #800000;">map</span><span style="color: #800000;">"</span>, <span style="color: #800000;">"</span><span style="color: #800000;">true</span><span style="color: #800000;">"</span>).increment(<span style="color: #800080;">1</span><span style="color: #000000;">);
            </span><span style="color: #0000ff;">return</span> <span style="color: #0000ff;">true</span><span style="color: #000000;">;
        }</span><span style="color: #0000ff;">else</span><span style="color: #000000;"> {
            context.getCounter(</span><span style="color: #800000;">"</span><span style="color: #800000;">map</span><span style="color: #800000;">"</span>, <span style="color: #800000;">"</span><span style="color: #800000;">false</span><span style="color: #800000;">"</span>).increment(<span style="color: #800080;">1</span><span style="color: #000000;">);
            </span><span style="color: #0000ff;">return</span> <span style="color: #0000ff;">false</span><span style="color: #000000;">;
        }
    }
}</span></pre>
</div>
<p>（2）编写LogDriver</p>
<div class="cnblogs_code">
<pre><span style="color: #000000;">package com.xyg.mapreduce.weblog;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

</span><span style="color: #0000ff;">public</span> <span style="color: #0000ff;">class</span><span style="color: #000000;"> LogDriver {

</span><span style="color: #0000ff;">    public</span> <span style="color: #0000ff;">static</span> <span style="color: #0000ff;">void</span><span style="color: #000000;"> main(String[] args) throws Exception {
        args </span>= <span style="color: #0000ff;">new</span> String[] { <span style="color: #800000;">"</span><span style="color: #800000;">e:/inputlog</span><span style="color: #800000;">"</span>, <span style="color: #800000;">"</span><span style="color: #800000;">e:/output1</span><span style="color: #800000;">"</span><span style="color: #000000;"> };
</span><span style="color: #008000;">    //</span><span style="color: #008000;"> 1 获取job信息</span>
    Configuration conf = <span style="color: #0000ff;">new</span><span style="color: #000000;"> Configuration();
    Job job </span>=<span style="color: #000000;"> Job.getInstance(conf);
</span><span style="color: #008000;">    //</span><span style="color: #008000;"> 2 加载jar包</span>
    job.setJarByClass(LogDriver.<span style="color: #0000ff;">class</span><span style="color: #000000;">);
</span><span style="color: #008000;">    //</span><span style="color: #008000;"> 3 关联map</span>
    job.setMapperClass(LogMapper.<span style="color: #0000ff;">class</span><span style="color: #000000;">);
</span><span style="color: #008000;">    //</span><span style="color: #008000;"> 4 设置最终输出类型</span>
    job.setOutputKeyClass(Text.<span style="color: #0000ff;">class</span><span style="color: #000000;">);
    job.setOutputValueClass(NullWritable.</span><span style="color: #0000ff;">class</span><span style="color: #000000;">);
</span><span style="color: #008000;">    //</span><span style="color: #008000;"> 5 设置输入和输出路径</span>
    FileInputFormat.setInputPaths(job, <span style="color: #0000ff;">new</span> Path(args[<span style="color: #800080;">0</span><span style="color: #000000;">]));
    FileOutputFormat.setOutputPath(job, </span><span style="color: #0000ff;">new</span> Path(args[<span style="color: #800080;">1</span><span style="color: #000000;">]));
</span><span style="color: #008000;">    //</span><span style="color: #008000;"> 6 提交</span>
    job.waitForCompletion(<span style="color: #0000ff;">true</span><span style="color: #000000;">);
    }
}</span></pre>
</div>
<h2><strong>二.&nbsp;</strong><strong>复杂解析版</strong></h2>
<h3>1）需求</h3>
<p><span style="font-family: 宋体;">对</span>web<span style="font-family: 宋体;">访问日志中的各字段识别切分</span></p>
<p>去除日志中不合法的记录</p>
<p>根据统计需求，生成各类访问请求过滤数据</p>
<h3>2<span style="font-family: 宋体;">）</span><span style="font-family: 宋体;">输入</span>数据</h3>
<div class="cnblogs_code">
<pre><span style="color: #800080;">194.237</span>.<span style="color: #800080;">142.21</span> - - [<span style="color: #800080;">18</span>/Sep/<span style="color: #800080;">2013</span>:<span style="color: #800080;">06</span>:<span style="color: #800080;">49</span>:<span style="color: #800080;">18</span> +<span style="color: #800080;">0000</span>] <span style="color: #800000;">"</span><span style="color: #800000;">GET /wp-content/uploads/2013/07/rstudio-git3.png HTTP/1.1</span><span style="color: #800000;">"</span> <span style="color: #800080;">304</span> <span style="color: #800080;">0</span> <span style="color: #800000;">"</span><span style="color: #800000;">-</span><span style="color: #800000;">"</span> <span style="color: #800000;">"</span><span style="color: #800000;">Mozilla/4.0 (compatible;)</span><span style="color: #800000;">"</span>
<span style="color: #800080;">183.49</span>.<span style="color: #800080;">46.228</span> - - [<span style="color: #800080;">18</span>/Sep/<span style="color: #800080;">2013</span>:<span style="color: #800080;">06</span>:<span style="color: #800080;">49</span>:<span style="color: #800080;">23</span> +<span style="color: #800080;">0000</span>] <span style="color: #800000;">"</span><span style="color: #800000;">-</span><span style="color: #800000;">"</span> <span style="color: #800080;">400</span> <span style="color: #800080;">0</span> <span style="color: #800000;">"</span><span style="color: #800000;">-</span><span style="color: #800000;">"</span> <span style="color: #800000;">"</span><span style="color: #800000;">-</span><span style="color: #800000;">"</span>
<span style="color: #800080;">163.177</span>.<span style="color: #800080;">71.12</span> - - [<span style="color: #800080;">18</span>/Sep/<span style="color: #800080;">2013</span>:<span style="color: #800080;">06</span>:<span style="color: #800080;">49</span>:<span style="color: #800080;">33</span> +<span style="color: #800080;">0000</span>] <span style="color: #800000;">"</span><span style="color: #800000;">HEAD / HTTP/1.1</span><span style="color: #800000;">"</span> <span style="color: #800080;">200</span> <span style="color: #800080;">20</span> <span style="color: #800000;">"</span><span style="color: #800000;">-</span><span style="color: #800000;">"</span> <span style="color: #800000;">"</span><span style="color: #800000;">DNSPod-Monitor/1.0</span><span style="color: #800000;">"</span>
<span style="color: #800080;">163.177</span>.<span style="color: #800080;">71.12</span> - - [<span style="color: #800080;">18</span>/Sep/<span style="color: #800080;">2013</span>:<span style="color: #800080;">06</span>:<span style="color: #800080;">49</span>:<span style="color: #800080;">36</span> +<span style="color: #800080;">0000</span>] <span style="color: #800000;">"</span><span style="color: #800000;">HEAD / HTTP/1.1</span><span style="color: #800000;">"</span> <span style="color: #800080;">200</span> <span style="color: #800080;">20</span> <span style="color: #800000;">"</span><span style="color: #800000;">-</span><span style="color: #800000;">"</span> <span style="color: #800000;">"</span><span style="color: #800000;">DNSPod-Monitor/1.0</span><span style="color: #800000;">"</span>
<span style="color: #800080;">101.226</span>.<span style="color: #800080;">68.137</span> - - [<span style="color: #800080;">18</span>/Sep/<span style="color: #800080;">2013</span>:<span style="color: #800080;">06</span>:<span style="color: #800080;">49</span>:<span style="color: #800080;">42</span> +<span style="color: #800080;">0000</span>] <span style="color: #800000;">"</span><span style="color: #800000;">HEAD / HTTP/1.1</span><span style="color: #800000;">"</span> <span style="color: #800080;">200</span> <span style="color: #800080;">20</span> <span style="color: #800000;">"</span><span style="color: #800000;">-</span><span style="color: #800000;">"</span> <span style="color: #800000;">"</span><span style="color: #800000;">DNSPod-Monitor/1.0</span><span style="color: #800000;">"</span>
<span style="color: #800080;">101.226</span>.<span style="color: #800080;">68.137</span> - - [<span style="color: #800080;">18</span>/Sep/<span style="color: #800080;">2013</span>:<span style="color: #800080;">06</span>:<span style="color: #800080;">49</span>:<span style="color: #800080;">45</span> +<span style="color: #800080;">0000</span>] <span style="color: #800000;">"</span><span style="color: #800000;">HEAD / HTTP/1.1</span><span style="color: #800000;">"</span> <span style="color: #800080;">200</span> <span style="color: #800080;">20</span> <span style="color: #800000;">"</span><span style="color: #800000;">-</span><span style="color: #800000;">"</span> <span style="color: #800000;">"</span><span style="color: #800000;">DNSPod-Monitor/1.0</span><span style="color: #800000;">"</span>
<span style="color: #800080;">60.208</span>.<span style="color: #800080;">6.156</span> - - [<span style="color: #800080;">18</span>/Sep/<span style="color: #800080;">2013</span>:<span style="color: #800080;">06</span>:<span style="color: #800080;">49</span>:<span style="color: #800080;">48</span> +<span style="color: #800080;">0000</span>] <span style="color: #800000;">"</span><span style="color: #800000;">GET /wp-content/uploads/2013/07/rcassandra.png HTTP/1.0</span><span style="color: #800000;">"</span> <span style="color: #800080;">200</span> <span style="color: #800080;">185524</span> <span style="color: #800000;">"</span><span style="color: #800000;">http://cos.name/category/software/packages/</span><span style="color: #800000;">"</span> <span style="color: #800000;">"</span><span style="color: #800000;">Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.66 Safari/537.36</span><span style="color: #800000;">"</span>
<span style="color: #800080;">222.68</span>.<span style="color: #800080;">172.190</span> - - [<span style="color: #800080;">18</span>/Sep/<span style="color: #800080;">2013</span>:<span style="color: #800080;">06</span>:<span style="color: #800080;">49</span>:<span style="color: #800080;">57</span> +<span style="color: #800080;">0000</span>] <span style="color: #800000;">"</span><span style="color: #800000;">GET /images/my.jpg HTTP/1.1</span><span style="color: #800000;">"</span> <span style="color: #800080;">200</span> <span style="color: #800080;">19939</span> <span style="color: #800000;">"</span><span style="color: #800000;">http://www.angularjs.cn/A00n</span><span style="color: #800000;">"</span> <span style="color: #800000;">"</span><span style="color: #800000;">Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.66 Safari/537.36</span><span style="color: #800000;">"</span>
<span style="color: #800080;">222.68</span>.<span style="color: #800080;">172.190</span> - - [<span style="color: #800080;">18</span>/Sep/<span style="color: #800080;">2013</span>:<span style="color: #800080;">06</span>:<span style="color: #800080;">50</span>:<span style="color: #800080;">08</span> +<span style="color: #800080;">0000</span>] <span style="color: #800000;">"</span><span style="color: #800000;">-</span><span style="color: #800000;">"</span> <span style="color: #800080;">400</span> <span style="color: #800080;">0</span> <span style="color: #800000;">"</span><span style="color: #800000;">-</span><span style="color: #800000;">"</span> <span style="color: #800000;">"</span><span style="color: #800000;">-</span><span style="color: #800000;">"</span>
<span style="color: #800080;">183.195</span>.<span style="color: #800080;">232.138</span> - - [<span style="color: #800080;">18</span>/Sep/<span style="color: #800080;">2013</span>:<span style="color: #800080;">06</span>:<span style="color: #800080;">50</span>:<span style="color: #800080;">16</span> +<span style="color: #800080;">0000</span>] <span style="color: #800000;">"</span><span style="color: #800000;">HEAD / HTTP/1.1</span><span style="color: #800000;">"</span> <span style="color: #800080;">200</span> <span style="color: #800080;">20</span> <span style="color: #800000;">"</span><span style="color: #800000;">-</span><span style="color: #800000;">"</span> <span style="color: #800000;">"</span><span style="color: #800000;">DNSPod-Monitor/1.0</span><span style="color: #800000;">"</span>
<span style="color: #800080;">183.195</span>.<span style="color: #800080;">232.138</span> - - [<span style="color: #800080;">18</span>/Sep/<span style="color: #800080;">2013</span>:<span style="color: #800080;">06</span>:<span style="color: #800080;">50</span>:<span style="color: #800080;">16</span> +<span style="color: #800080;">0000</span>] <span style="color: #800000;">"</span><span style="color: #800000;">HEAD / HTTP/1.1</span><span style="color: #800000;">"</span> <span style="color: #800080;">200</span> <span style="color: #800080;">20</span> <span style="color: #800000;">"</span><span style="color: #800000;">-</span><span style="color: #800000;">"</span> <span style="color: #800000;">"</span><span style="color: #800000;">DNSPod-Monitor/1.0</span><span style="color: #800000;">"</span>
<span style="color: #800080;">66.249</span>.<span style="color: #800080;">66.84</span> - - [<span style="color: #800080;">18</span>/Sep/<span style="color: #800080;">2013</span>:<span style="color: #800080;">06</span>:<span style="color: #800080;">50</span>:<span style="color: #800080;">28</span> +<span style="color: #800080;">0000</span>] <span style="color: #800000;">"</span><span style="color: #800000;">GET /page/6/ HTTP/1.1</span><span style="color: #800000;">"</span> <span style="color: #800080;">200</span> <span style="color: #800080;">27777</span> <span style="color: #800000;">"</span><span style="color: #800000;">-</span><span style="color: #800000;">"</span> <span style="color: #800000;">"</span><span style="color: #800000;">Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)</span><span style="color: #800000;">"</span>
<span style="color: #800080;">221.130</span>.<span style="color: #800080;">41.168</span> - - [<span style="color: #800080;">18</span>/Sep/<span style="color: #800080;">2013</span>:<span style="color: #800080;">06</span>:<span style="color: #800080;">50</span>:<span style="color: #800080;">37</span> +<span style="color: #800080;">0000</span>] <span style="color: #800000;">"</span><span style="color: #800000;">GET /feed/ HTTP/1.1</span><span style="color: #800000;">"</span> <span style="color: #800080;">304</span> <span style="color: #800080;">0</span> <span style="color: #800000;">"</span><span style="color: #800000;">-</span><span style="color: #800000;">"</span> <span style="color: #800000;">"</span><span style="color: #800000;">Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.66 Safari/537.36</span><span style="color: #800000;">"</span>
<span style="color: #800080;">157.55</span>.<span style="color: #800080;">35.40</span> - - [<span style="color: #800080;">18</span>/Sep/<span style="color: #800080;">2013</span>:<span style="color: #800080;">06</span>:<span style="color: #800080;">51</span>:<span style="color: #800080;">13</span> +<span style="color: #800080;">0000</span>] <span style="color: #800000;">"</span><span style="color: #800000;">GET /robots.txt HTTP/1.1</span><span style="color: #800000;">"</span> <span style="color: #800080;">200</span> <span style="color: #800080;">150</span> <span style="color: #800000;">"</span><span style="color: #800000;">-</span><span style="color: #800000;">"</span> <span style="color: #800000;">"</span><span style="color: #800000;">Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)</span><span style="color: #800000;">"</span>
<span style="color: #800080;">50.116</span>.<span style="color: #800080;">27.194</span> - - [<span style="color: #800080;">18</span>/Sep/<span style="color: #800080;">2013</span>:<span style="color: #800080;">06</span>:<span style="color: #800080;">51</span>:<span style="color: #800080;">35</span> +<span style="color: #800080;">0000</span>] <span style="color: #800000;">"</span><span style="color: #800000;">POST /wp-cron.php?doing_wp_cron=1379487095.2510800361633300781250 HTTP/1.0</span><span style="color: #800000;">"</span> <span style="color: #800080;">200</span> <span style="color: #800080;">0</span> <span style="color: #800000;">"</span><span style="color: #800000;">-</span><span style="color: #800000;">"</span> <span style="color: #800000;">"</span><span style="color: #800000;">WordPress/3.6; http://blog.fens.me</span><span style="color: #800000;">"</span>
<span style="color: #800080;">58.215</span>.<span style="color: #800080;">204.118</span> - - [<span style="color: #800080;">18</span>/Sep/<span style="color: #800080;">2013</span>:<span style="color: #800080;">06</span>:<span style="color: #800080;">51</span>:<span style="color: #800080;">35</span> +<span style="color: #800080;">0000</span>] <span style="color: #800000;">"</span><span style="color: #800000;">GET /nodejs-socketio-chat/ HTTP/1.1</span><span style="color: #800000;">"</span> <span style="color: #800080;">200</span> <span style="color: #800080;">10818</span> <span style="color: #800000;">"</span><span style="color: #800000;">http://www.google.com/url?sa=t&amp;rct=j&amp;q=nodejs%20%E5%BC%82%E6%AD%A5%E5%B9%BF%E6%92%AD&amp;source=web&amp;cd=1&amp;cad=rja&amp;ved=0CCgQFjAA&amp;url=%68%74%74%70%3a%2f%2f%62%6c%6f%67%2e%66%65%6e%73%2e%6d%65%2f%6e%6f%64%65%6a%73%2d%73%6f%63%6b%65%74%69%6f%2d%63%68%61%74%2f&amp;ei=rko5UrylAefOiAe7_IGQBw&amp;usg=AFQjCNG6YWoZsJ_bSj8kTnMHcH51hYQkAA&amp;bvm=bv.52288139,d.aGc</span><span style="color: #800000;">"</span> <span style="color: #800000;">"</span><span style="color: #800000;">Mozilla/5.0 (Windows NT 5.1; rv:23.0) Gecko/20100101 Firefox/23.0</span><span style="color: #800000;">"</span>
<span style="color: #800080;">58.215</span>.<span style="color: #800080;">204.118</span> - - [<span style="color: #800080;">18</span>/Sep/<span style="color: #800080;">2013</span>:<span style="color: #800080;">06</span>:<span style="color: #800080;">51</span>:<span style="color: #800080;">36</span> +<span style="color: #800080;">0000</span>] <span style="color: #800000;">"</span><span style="color: #800000;">GET /wp-includes/js/jquery/jquery-migrate.min.js?ver=1.2.1 HTTP/1.1</span><span style="color: #800000;">"</span> <span style="color: #800080;">304</span> <span style="color: #800080;">0</span> <span style="color: #800000;">"</span><span style="color: #800000;">http://blog.fens.me/nodejs-socketio-chat/</span><span style="color: #800000;">"</span> <span style="color: #800000;">"</span><span style="color: #800000;">Mozilla/5.0 (Windows NT 5.1; rv:23.0) Gecko/20100101 Firefox/23.0</span><span style="color: #800000;">"</span>
<span style="color: #800080;">58.215</span>.<span style="color: #800080;">204.118</span> - - [<span style="color: #800080;">18</span>/Sep/<span style="color: #800080;">2013</span>:<span style="color: #800080;">06</span>:<span style="color: #800080;">51</span>:<span style="color: #800080;">35</span> +<span style="color: #800080;">0000</span>] <span style="color: #800000;">"</span><span style="color: #800000;">GET /wp-includes/js/jquery/jquery.js?ver=1.10.2 HTTP/1.1</span><span style="color: #800000;">"</span> <span style="color: #800080;">304</span> <span style="color: #800080;">0</span> <span style="color: #800000;">"</span><span style="color: #800000;">http://blog.fens.me/nodejs-socketio-chat/</span><span style="color: #800000;">"</span> <span style="color: #800000;">"</span><span style="color: #800000;">Mozilla/5.0 (Windows NT 5.1; rv:23.0) Gecko/20100101 Firefox/23.0</span><span style="color: #800000;">"</span>
<span style="color: #800080;">58.215</span>.<span style="color: #800080;">204.118</span> - - [<span style="color: #800080;">18</span>/Sep/<span style="color: #800080;">2013</span>:<span style="color: #800080;">06</span>:<span style="color: #800080;">51</span>:<span style="color: #800080;">36</span> +<span style="color: #800080;">0000</span>] <span style="color: #800000;">"</span><span style="color: #800000;">GET /wp-includes/js/comment-reply.min.js?ver=3.6 HTTP/1.1</span><span style="color: #800000;">"</span> <span style="color: #800080;">304</span> <span style="color: #800080;">0</span> <span style="color: #800000;">"</span><span style="color: #800000;">http://blog.fens.me/nodejs-socketio-chat/</span><span style="color: #800000;">"</span> <span style="color: #800000;">"</span><span style="color: #800000;">Mozilla/5.0 (Windows NT 5.1; rv:23.0) Gecko/20100101 Firefox/23.0</span><span style="color: #800000;">"</span>
<span style="color: #800080;">58.215</span>.<span style="color: #800080;">204.118</span> - - [<span style="color: #800080;">18</span>/Sep/<span style="color: #800080;">2013</span>:<span style="color: #800080;">06</span>:<span style="color: #800080;">51</span>:<span style="color: #800080;">36</span> +<span style="color: #800080;">0000</span>] <span style="color: #800000;">"</span><span style="color: #800000;">GET /wp-content/uploads/2013/08/chat.png HTTP/1.1</span><span style="color: #800000;">"</span> <span style="color: #800080;">200</span> <span style="color: #800080;">48968</span> <span style="color: #800000;">"</span><span style="color: #800000;">http://blog.fens.me/nodejs-socketio-chat/</span><span style="color: #800000;">"</span> <span style="color: #800000;">"</span><span style="color: #800000;">Mozilla/5.0 (Windows NT 5.1; rv:23.0) Gecko/20100101 Firefox/23.0</span><span style="color: #800000;">"</span>
<span style="color: #800080;">58.215</span>.<span style="color: #800080;">204.118</span> - - [<span style="color: #800080;">18</span>/Sep/<span style="color: #800080;">2013</span>:<span style="color: #800080;">06</span>:<span style="color: #800080;">51</span>:<span style="color: #800080;">36</span> +<span style="color: #800080;">0000</span>] <span style="color: #800000;">"</span><span style="color: #800000;">GET /wp-content/uploads/2013/08/chat2.png HTTP/1.1</span><span style="color: #800000;">"</span> <span style="color: #800080;">200</span> <span style="color: #800080;">59852</span> <span style="color: #800000;">"</span><span style="color: #800000;">http://blog.fens.me/nodejs-socketio-chat/</span><span style="color: #800000;">"</span> <span style="color: #800000;">"</span><span style="color: #800000;">Mozilla/5.0 (Windows NT 5.1; rv:23.0) Gecko/20100101 Firefox/23.0</span><span style="color: #800000;">"</span>
<span style="color: #800080;">58.215</span>.<span style="color: #800080;">204.118</span> - - [<span style="color: #800080;">18</span>/Sep/<span style="color: #800080;">2013</span>:<span style="color: #800080;">06</span>:<span style="color: #800080;">51</span>:<span style="color: #800080;">37</span> +<span style="color: #800080;">0000</span>] <span style="color: #800000;">"</span><span style="color: #800000;">GET /wp-content/uploads/2013/08/socketio.png HTTP/1.1</span><span style="color: #800000;">"</span> <span style="color: #800080;">200</span> <span style="color: #800080;">80493</span> <span style="color: #800000;">"</span><span style="color: #800000;">http://blog.fens.me/nodejs-socketio-chat/</span><span style="color: #800000;">"</span> <span style="color: #800000;">"</span><span style="color: #800000;">Mozilla/5.0 (Windows NT 5.1; rv:23.0) Gecko/20100101 Firefox/23.0</span><span style="color: #800000;">"</span>
<span style="color: #800080;">58.248</span>.<span style="color: #800080;">178.212</span> - - [<span style="color: #800080;">18</span>/Sep/<span style="color: #800080;">2013</span>:<span style="color: #800080;">06</span>:<span style="color: #800080;">51</span>:<span style="color: #800080;">37</span> +<span style="color: #800080;">0000</span>] <span style="color: #800000;">"</span><span style="color: #800000;">GET /nodejs-grunt-intro/ HTTP/1.1</span><span style="color: #800000;">"</span> <span style="color: #800080;">200</span> <span style="color: #800080;">51770</span> <span style="color: #800000;">"</span><span style="color: #800000;">http://blog.fens.me/series-nodejs/</span><span style="color: #800000;">"</span> <span style="color: #800000;">"</span><span style="color: #800000;">Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; .NET CLR 1.1.4322; .NET CLR 2.0.50727; .NET CLR 3.0.04506.30; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729; MDDR; InfoPath.2; .NET4.0C)</span><span style="color: #800000;">"</span>
<span style="color: #800080;">58.248</span>.<span style="color: #800080;">178.212</span> - - [<span style="color: #800080;">18</span>/Sep/<span style="color: #800080;">2013</span>:<span style="color: #800080;">06</span>:<span style="color: #800080;">51</span>:<span style="color: #800080;">40</span> +<span style="color: #800080;">0000</span>] <span style="color: #800000;">"</span><span style="color: #800000;">GET /wp-includes/js/jquery/jquery-migrate.min.js?ver=1.2.1 HTTP/1.1</span><span style="color: #800000;">"</span> <span style="color: #800080;">200</span> <span style="color: #800080;">7200</span> <span style="color: #800000;">"</span><span style="color: #800000;">http://blog.fens.me/nodejs-grunt-intro/</span><span style="color: #800000;">"</span> <span style="color: #800000;">"</span><span style="color: #800000;">Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; .NET CLR 1.1.4322; .NET CLR 2.0.50727; .NET CLR 3.0.04506.30; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729; MDDR; InfoPath.2; .NET4.0C)</span><span style="color: #800000;">"</span></pre>
</div>
<h3>3）实现代码</h3>
<p><span style="font-family: 宋体;">（</span>1<span style="font-family: 宋体;">）定义一个</span><span style="font-family: 'Times New Roman';">bean</span><span style="font-family: 宋体;">，用来记录日志数据中的各数据字段</span></p>
<div class="cnblogs_code">
<pre><span style="color: #000000;">package com.xyg.mapreduce.log;

</span><span style="color: #0000ff;">public</span> <span style="color: #0000ff;">class</span><span style="color: #000000;"> LogBean {
    </span><span style="color: #0000ff;">private</span> String remote_addr;<span style="color: #008000;">//</span><span style="color: #008000;"> 记录客户端的ip地址</span>
    <span style="color: #0000ff;">private</span> String remote_user;<span style="color: #008000;">//</span><span style="color: #008000;"> 记录客户端用户名称,忽略属性"-"</span>
    <span style="color: #0000ff;">private</span> String time_local;<span style="color: #008000;">//</span><span style="color: #008000;"> 记录访问时间与时区</span>
    <span style="color: #0000ff;">private</span> String request;<span style="color: #008000;">//</span><span style="color: #008000;"> 记录请求的url与http协议</span>
    <span style="color: #0000ff;">private</span> String status;<span style="color: #008000;">//</span><span style="color: #008000;"> 记录请求状态；成功是200</span>
    <span style="color: #0000ff;">private</span> String body_bytes_sent;<span style="color: #008000;">//</span><span style="color: #008000;"> 记录发送给客户端文件主体内容大小</span>
    <span style="color: #0000ff;">private</span> String http_referer;<span style="color: #008000;">//</span><span style="color: #008000;"> 用来记录从那个页面链接访问过来的</span>
    <span style="color: #0000ff;">private</span> String http_user_agent;<span style="color: #008000;">//</span><span style="color: #008000;"> 记录客户浏览器的相关信息</span>

    <span style="color: #0000ff;">private</span> boolean valid = <span style="color: #0000ff;">true</span>;<span style="color: #008000;">//</span><span style="color: #008000;"> 判断数据是否合法</span>

    <span style="color: #0000ff;">public</span><span style="color: #000000;"> String getRemote_addr() {
        </span><span style="color: #0000ff;">return</span><span style="color: #000000;"> remote_addr;
    }

    </span><span style="color: #0000ff;">public</span> <span style="color: #0000ff;">void</span><span style="color: #000000;"> setRemote_addr(String remote_addr) {
        </span><span style="color: #0000ff;">this</span>.remote_addr =<span style="color: #000000;"> remote_addr;
    }

    </span><span style="color: #0000ff;">public</span><span style="color: #000000;"> String getRemote_user() {
        </span><span style="color: #0000ff;">return</span><span style="color: #000000;"> remote_user;
    }

    </span><span style="color: #0000ff;">public</span> <span style="color: #0000ff;">void</span><span style="color: #000000;"> setRemote_user(String remote_user) {
        </span><span style="color: #0000ff;">this</span>.remote_user =<span style="color: #000000;"> remote_user;
    }

    </span><span style="color: #0000ff;">public</span><span style="color: #000000;"> String getTime_local() {
        </span><span style="color: #0000ff;">return</span><span style="color: #000000;"> time_local;
    }

    </span><span style="color: #0000ff;">public</span> <span style="color: #0000ff;">void</span><span style="color: #000000;"> setTime_local(String time_local) {
        </span><span style="color: #0000ff;">this</span>.time_local =<span style="color: #000000;"> time_local;
    }

    </span><span style="color: #0000ff;">public</span><span style="color: #000000;"> String getRequest() {
        </span><span style="color: #0000ff;">return</span><span style="color: #000000;"> request;
    }

    </span><span style="color: #0000ff;">public</span> <span style="color: #0000ff;">void</span><span style="color: #000000;"> setRequest(String request) {
        </span><span style="color: #0000ff;">this</span>.request =<span style="color: #000000;"> request;
    }

    </span><span style="color: #0000ff;">public</span><span style="color: #000000;"> String getStatus() {
        </span><span style="color: #0000ff;">return</span><span style="color: #000000;"> status;
    }

    </span><span style="color: #0000ff;">public</span> <span style="color: #0000ff;">void</span><span style="color: #000000;"> setStatus(String status) {
        </span><span style="color: #0000ff;">this</span>.status =<span style="color: #000000;"> status;
    }

    </span><span style="color: #0000ff;">public</span><span style="color: #000000;"> String getBody_bytes_sent() {
        </span><span style="color: #0000ff;">return</span><span style="color: #000000;"> body_bytes_sent;
    }

    </span><span style="color: #0000ff;">public</span> <span style="color: #0000ff;">void</span><span style="color: #000000;"> setBody_bytes_sent(String body_bytes_sent) {
        </span><span style="color: #0000ff;">this</span>.body_bytes_sent =<span style="color: #000000;"> body_bytes_sent;
    }

    </span><span style="color: #0000ff;">public</span><span style="color: #000000;"> String getHttp_referer() {
        </span><span style="color: #0000ff;">return</span><span style="color: #000000;"> http_referer;
    }

    </span><span style="color: #0000ff;">public</span> <span style="color: #0000ff;">void</span><span style="color: #000000;"> setHttp_referer(String http_referer) {
        </span><span style="color: #0000ff;">this</span>.http_referer =<span style="color: #000000;"> http_referer;
    }

    </span><span style="color: #0000ff;">public</span><span style="color: #000000;"> String getHttp_user_agent() {
        </span><span style="color: #0000ff;">return</span><span style="color: #000000;"> http_user_agent;
    }

    </span><span style="color: #0000ff;">public</span> <span style="color: #0000ff;">void</span><span style="color: #000000;"> setHttp_user_agent(String http_user_agent) {
        </span><span style="color: #0000ff;">this</span>.http_user_agent =<span style="color: #000000;"> http_user_agent;
    }

    </span><span style="color: #0000ff;">public</span><span style="color: #000000;"> boolean isValid() {
        </span><span style="color: #0000ff;">return</span><span style="color: #000000;"> valid;
    }

    </span><span style="color: #0000ff;">public</span> <span style="color: #0000ff;">void</span><span style="color: #000000;"> setValid(boolean valid) {
        </span><span style="color: #0000ff;">this</span>.valid =<span style="color: #000000;"> valid;
    }

    @Override
    </span><span style="color: #0000ff;">public</span><span style="color: #000000;"> String toString() {
        StringBuilder sb </span>= <span style="color: #0000ff;">new</span><span style="color: #000000;"> StringBuilder();
        sb.append(</span><span style="color: #0000ff;">this</span><span style="color: #000000;">.valid);
        sb.append(</span><span style="color: #800000;">"</span><span style="color: #800000;">\001</span><span style="color: #800000;">"</span>).append(<span style="color: #0000ff;">this</span><span style="color: #000000;">.remote_addr);
        sb.append(</span><span style="color: #800000;">"</span><span style="color: #800000;">\001</span><span style="color: #800000;">"</span>).append(<span style="color: #0000ff;">this</span><span style="color: #000000;">.remote_user);
        sb.append(</span><span style="color: #800000;">"</span><span style="color: #800000;">\001</span><span style="color: #800000;">"</span>).append(<span style="color: #0000ff;">this</span><span style="color: #000000;">.time_local);
        sb.append(</span><span style="color: #800000;">"</span><span style="color: #800000;">\001</span><span style="color: #800000;">"</span>).append(<span style="color: #0000ff;">this</span><span style="color: #000000;">.request);
        sb.append(</span><span style="color: #800000;">"</span><span style="color: #800000;">\001</span><span style="color: #800000;">"</span>).append(<span style="color: #0000ff;">this</span><span style="color: #000000;">.status);
        sb.append(</span><span style="color: #800000;">"</span><span style="color: #800000;">\001</span><span style="color: #800000;">"</span>).append(<span style="color: #0000ff;">this</span><span style="color: #000000;">.body_bytes_sent);
        sb.append(</span><span style="color: #800000;">"</span><span style="color: #800000;">\001</span><span style="color: #800000;">"</span>).append(<span style="color: #0000ff;">this</span><span style="color: #000000;">.http_referer);
        sb.append(</span><span style="color: #800000;">"</span><span style="color: #800000;">\001</span><span style="color: #800000;">"</span>).append(<span style="color: #0000ff;">this</span><span style="color: #000000;">.http_user_agent);
        
        </span><span style="color: #0000ff;">return</span><span style="color: #000000;"> sb.toString();
    }
}</span></pre>
</div>
<p><span style="font-family: 宋体;">（</span>2<span style="font-family: 宋体;">）编写</span>LogMapper程序</p>
<div class="cnblogs_code">
<pre><span style="color: #000000;">package com.xyg.mapreduce.log;<br />
import java.io.IOException;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

</span><span style="color: #0000ff;">public</span> <span style="color: #0000ff;">class</span> LogMapper extends Mapper&lt;LongWritable, Text, Text, NullWritable&gt;<span style="color: #000000;">{
    Text k </span>= <span style="color: #0000ff;">new</span><span style="color: #000000;"> Text();
    
    @Override
    </span><span style="color: #0000ff;">protected</span> <span style="color: #0000ff;">void</span><span style="color: #000000;"> map(LongWritable key, Text value, Context context)
            throws IOException, InterruptedException {
        </span><span style="color: #008000;">//</span><span style="color: #008000;"> 1 获取1行</span>
        String line =<span style="color: #000000;"> value.toString();        
        </span><span style="color: #008000;">//</span><span style="color: #008000;"> 2 解析日志是否合法</span>
        LogBean bean =<span style="color: #000000;"> pressLog(line);        
        </span><span style="color: #0000ff;">if</span> (!<span style="color: #000000;">bean.isValid()) {
            </span><span style="color: #0000ff;">return</span><span style="color: #000000;">;
        }        
        k.</span><span style="color: #0000ff;">set</span><span style="color: #000000;">(bean.toString());        
        </span><span style="color: #008000;">//</span><span style="color: #008000;"> 3 输出</span>
        context.write(k, NullWritable.<span style="color: #0000ff;">get</span><span style="color: #000000;">());
    }

    </span><span style="color: #008000;">//</span><span style="color: #008000;"> 解析日志</span>
    <span style="color: #0000ff;">private</span><span style="color: #000000;"> LogBean pressLog(String line) {
        LogBean logBean </span>= <span style="color: #0000ff;">new</span><span style="color: #000000;"> LogBean();
        </span><span style="color: #008000;">//</span><span style="color: #008000;"> 1 截取</span>
        String[] fields = line.split(<span style="color: #800000;">"</span> <span style="color: #800000;">"</span><span style="color: #000000;">);   
        </span><span style="color: #0000ff;">if</span> (fields.length &gt; <span style="color: #800080;">11</span><span style="color: #000000;">) {
            </span><span style="color: #008000;">//</span><span style="color: #008000;"> 2封装数据</span>
            logBean.setRemote_addr(fields[<span style="color: #800080;">0</span><span style="color: #000000;">]);
            logBean.setRemote_user(fields[</span><span style="color: #800080;">1</span><span style="color: #000000;">]);
            logBean.setTime_local(fields[</span><span style="color: #800080;">3</span>].substring(<span style="color: #800080;">1</span><span style="color: #000000;">));
            logBean.setRequest(fields[</span><span style="color: #800080;">6</span><span style="color: #000000;">]);
            logBean.setStatus(fields[</span><span style="color: #800080;">8</span><span style="color: #000000;">]);
            logBean.setBody_bytes_sent(fields[</span><span style="color: #800080;">9</span><span style="color: #000000;">]);
            logBean.setHttp_referer(fields[</span><span style="color: #800080;">10</span><span style="color: #000000;">]);
            
            </span><span style="color: #0000ff;">if</span> (fields.length &gt; <span style="color: #800080;">12</span><span style="color: #000000;">) {
                logBean.setHttp_user_agent(fields[</span><span style="color: #800080;">11</span>] + <span style="color: #800000;">"</span> <span style="color: #800000;">"</span>+ fields[<span style="color: #800080;">12</span><span style="color: #000000;">]);
            }</span><span style="color: #0000ff;">else</span><span style="color: #000000;"> {
                logBean.setHttp_user_agent(fields[</span><span style="color: #800080;">11</span><span style="color: #000000;">]);
            }           
            </span><span style="color: #008000;">//</span><span style="color: #008000;"> 大于400，HTTP错误</span>
            <span style="color: #0000ff;">if</span> (Integer.parseInt(logBean.getStatus()) &gt;= <span style="color: #800080;">400</span><span style="color: #000000;">) {
                logBean.setValid(</span><span style="color: #0000ff;">false</span><span style="color: #000000;">);
            }
        }</span><span style="color: #0000ff;">else</span><span style="color: #000000;"> {
            logBean.setValid(</span><span style="color: #0000ff;">false</span><span style="color: #000000;">);
        }
        </span><span style="color: #0000ff;">return</span><span style="color: #000000;"> logBean;
    }
}</span></pre>
</div>
<p>（3）编写LogDriver程序</p>
<div class="cnblogs_code">
<pre><span style="color: #000000;">package com.xyg.mapreduce.log;<br />
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

</span><span style="color: #0000ff;">public</span> <span style="color: #0000ff;">class</span><span style="color: #000000;"> LogDriver {
    </span><span style="color: #0000ff;">public</span> <span style="color: #0000ff;">static</span> <span style="color: #0000ff;">void</span><span style="color: #000000;"> main(String[] args) throws Exception {
        </span><span style="color: #008000;">//</span><span style="color: #008000;"> 1 获取job信息</span>
        Configuration conf = <span style="color: #0000ff;">new</span><span style="color: #000000;"> Configuration();
        Job job </span>=<span style="color: #000000;"> Job.getInstance(conf);
        </span><span style="color: #008000;">//</span><span style="color: #008000;"> 2 加载jar包</span>
        job.setJarByClass(LogDriver.<span style="color: #0000ff;">class</span><span style="color: #000000;">);
        </span><span style="color: #008000;">//</span><span style="color: #008000;"> 3 关联map</span>
        job.setMapperClass(LogMapper.<span style="color: #0000ff;">class</span><span style="color: #000000;">);
        </span><span style="color: #008000;">//</span><span style="color: #008000;"> 4 设置最终输出类型</span>
        job.setOutputKeyClass(Text.<span style="color: #0000ff;">class</span><span style="color: #000000;">);
        job.setOutputValueClass(NullWritable.</span><span style="color: #0000ff;">class</span><span style="color: #000000;">);
        </span><span style="color: #008000;">//</span><span style="color: #008000;"> 5 设置输入和输出路径</span>
        FileInputFormat.setInputPaths(job, <span style="color: #0000ff;">new</span> Path(args[<span style="color: #800080;">0</span><span style="color: #000000;">]));
        FileOutputFormat.setOutputPath(job, </span><span style="color: #0000ff;">new</span> Path(args[<span style="color: #800080;">1</span><span style="color: #000000;">]));
        </span><span style="color: #008000;">//</span><span style="color: #008000;"> 6 提交</span>
        job.waitForCompletion(<span style="color: #0000ff;">true</span><span style="color: #000000;">);
    }
}</span></pre>
</div>
<p>&nbsp;</p></div>

</body>
</html>
